library(tidyverse) # data manipulation## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.1 ✓ dplyr 1.0.6
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(cluster) # clustering algorithms
library(factoextra) # clustering algorithms & visualization## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(gProfileR)load("~/Documents/MiGASti/Databases/df_num_scale_LOGFC1stim.Rdata")
load("~/Documents/MiGASti/Databases/Kmeans1204gsupport.Rdata")
df <- df_num_scale
#scale data:As we don?t want the clustering algorithm to depend to an arbitrary variable unit, we start by scaling/standardizing
df3 <- scale(df)set.seed(123)
fviz_nbclust(df3, kmeans, method = "wss")fviz_nbclust(df3, kmeans, method = "silhouette")# compute gap statistic
set.seed(123)
gap_stat <- clusGap(df3, FUN = kmeans, nstart = 25,
K.max = 10, B = 50)
# Print the result
print(gap_stat, method = "firstmax")## Clustering Gap statistic ["clusGap"] from call:
## clusGap(x = df3, FUNcluster = kmeans, K.max = 10, B = 50, nstart = 25)
## B=50 simulated reference sets, k = 1..10; spaceH0="scaledPCA"
## --> Number of clusters (method 'firstmax'): 2
## logW E.logW gap SE.sim
## [1,] 6.943107 7.882240 0.9391329 0.006428239
## [2,] 6.809108 7.755145 0.9460373 0.006539340
## [3,] 6.735218 7.677794 0.9425759 0.006013863
## [4,] 6.667009 7.619304 0.9522946 0.006075357
## [5,] 6.593164 7.582524 0.9893593 0.005630834
## [6,] 6.542798 7.550159 1.0073609 0.005045045
## [7,] 6.492002 7.522901 1.0308986 0.005105812
## [8,] 6.453713 7.497541 1.0438275 0.004866291
## [9,] 6.420197 7.476286 1.0560882 0.004774224
## [10,] 6.402617 7.456369 1.0537526 0.004633696
fviz_gap_stat(gap_stat)k3 <- kmeans(df3, centers = 3, nstart = 25)
str(k3)## List of 9
## $ cluster : int [1:1204] 3 3 3 2 2 3 3 3 3 3 ...
## $ centers : num [1:3, 1:7] 2.1299 0.0598 -0.3688 -0.1881 0.5394 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:3] "1" "2" "3"
## .. ..$ : chr [1:7] "LPS" "IFNy" "IL4" "R848" ...
## $ totss : num 8421
## $ withinss : num [1:3] 716 1899 3005
## $ tot.withinss: num 5619
## $ betweenss : num 2802
## $ size : int [1:3] 118 348 738
## $ iter : int 3
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
fviz_cluster(k3, data = df3)k5 <- kmeans(df3, centers = 5, nstart = 25)
str(k5)## List of 9
## $ cluster : int [1:1204] 5 2 5 3 1 2 2 2 2 5 ...
## $ centers : num [1:5, 1:7] -0.0477 -0.7981 0.1702 2.1663 -0.0189 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:5] "1" "2" "3" "4" ...
## .. ..$ : chr [1:7] "LPS" "IFNy" "IL4" "R848" ...
## $ totss : num 8421
## $ withinss : num [1:5] 573 999 428 674 1529
## $ tot.withinss: num 4204
## $ betweenss : num 4217
## $ size : int [1:5] 226 303 86 114 475
## $ iter : int 5
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
fviz_cluster(k5, data = df3)k2 <- kmeans(df3, centers = 2, nstart = 25)
str(k2)## List of 9
## $ cluster : int [1:1204] 1 1 2 1 2 1 1 1 1 1 ...
## $ centers : num [1:2, 1:7] -0.3417 0.7437 -0.0193 0.0421 0.2855 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "1" "2"
## .. ..$ : chr [1:7] "LPS" "IFNy" "IL4" "R848" ...
## $ totss : num 8421
## $ withinss : num [1:2] 4005 2531
## $ tot.withinss: num 6536
## $ betweenss : num 1885
## $ size : int [1:2] 825 379
## $ iter : int 1
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
fviz_cluster(k2, data = df3)#first run
k <- kmeans(df3, centers = 2, nstart = 25)
kmeans <- k$cluster
kmean_download <- as.data.frame(kmeans)
kmeanscor <- cbind(kmean_download, DF_log2FC)
gencode_30 <- read.delim("~/Documents/MiGASti/Databases/ens.geneid.gencode.v30")
colnames(gencode_30) = c("ensembl","symbol")
kmeanscor1 = merge(kmeanscor, gencode_30, by="ensembl")
#second run
k <- kmeans(df3, centers = 2, nstart = 25)
kmeans <- k$cluster
kmean_download <- as.data.frame(kmeans)
kmeanscor <- cbind(kmean_download, DF_log2FC)
kmeanscor2 = merge(kmeanscor, gencode_30, by="ensembl")
res <- cor.test(kmeanscor1$kmeans, kmeanscor2$kmeans, method = "pearson")
res ##
## Pearson's product-moment correlation
##
## data: kmeanscor1$kmeans and kmeanscor2$kmeans
## t = -Inf, df = 1202, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -1 -1
## sample estimates:
## cor
## -1
cluster1 <- kmeanscor1[kmeanscor1$kmeans == 1,]
cluster2 <- kmeanscor1[kmeanscor1$kmeans == 2,]
gprofiler_results_1 <- gprofiler(query = cluster1$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_2 <- gprofiler(query = cluster2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
head(gprofiler_results_1$term.name)## [1] "peptidyl-tyrosine modification"
## [2] "nucleobase-containing small molecule catabolic process"
## [3] "odontoblast differentiation"
## [4] "regulation of phagocytosis"
## [5] "positive regulation of phagocytosis"
## [6] "circulatory system process"
head(gprofiler_results_2$term.name)## [1] "response to corticosteroid"
## [2] "response to glucocorticoid"
## [3] "neutrophil differentiation"
## [4] "response to stimulus"
## [5] "response to chemical"
## [6] "response to oxygen-containing compound"
cluster1_2 <- kmeanscor2[kmeanscor2$kmeans == 1,]
cluster2_2 <- kmeanscor2[kmeanscor2$kmeans == 2,]
gprofiler_results_1_2 <- gprofiler(query = cluster1_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_2_2 <- gprofiler(query = cluster2_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
head(gprofiler_results_1_2$term.name)## [1] "regulation of protein oligomerization"
## [2] "positive regulation of protein oligomerization"
## [3] "sensory perception of sound"
## [4] "neuron projection arborization"
## [5] "regulation of neuron projection arborization"
## [6] "positive regulation of neuron projection arborization"
head(gprofiler_results_2_2$term.name)## [1] "neuron apoptotic process"
## [2] "nucleobase-containing small molecule catabolic process"
## [3] "cyclic-nucleotide-mediated signaling"
## [4] "negative regulation of coagulation"
## [5] "peptidyl-tyrosine autophosphorylation"
## [6] "cytosol to ER transport"
k4 <- kmeans(df3, centers = 4, nstart = 25)
str(k4)## List of 9
## $ cluster : int [1:1204] 4 4 4 2 1 4 4 4 4 4 ...
## $ centers : num [1:4, 1:7] -0.0211 0.1702 2.1107 -0.3594 -0.1524 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:4] "1" "2" "3" "4"
## .. ..$ : chr [1:7] "LPS" "IFNy" "IL4" "R848" ...
## $ totss : num 8421
## $ withinss : num [1:4] 773 428 726 2908
## $ tot.withinss: num 4835
## $ betweenss : num 3586
## $ size : int [1:4] 261 86 121 736
## $ iter : int 3
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
fviz_cluster(k4, data = df3)k <- kmeans(df3, centers = 4, nstart = 25)
kmeans <- k$cluster
kmean_download <- as.data.frame(kmeans)
kmeanscor <- cbind(kmean_download, DF_log2FC)
kmeanscor1 = merge(kmeanscor, gencode_30, by="ensembl")
k <- kmeans(df3, centers = 4, nstart = 25)
kmeans <- k$cluster
kmean_download <- as.data.frame(kmeans)
kmeanscor <- cbind(kmean_download, DF_log2FC)
kmeanscor2 = merge(kmeanscor, gencode_30, by="ensembl")
res <- cor.test(kmeanscor1$kmeans, kmeanscor2$kmeans, method = "pearson")
res ##
## Pearson's product-moment correlation
##
## data: kmeanscor1$kmeans and kmeanscor2$kmeans
## t = 100.24, df = 1202, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9386973 0.9508033
## sample estimates:
## cor
## 0.9450735
cluster1 <- kmeanscor1[kmeanscor1$kmeans == 1,]
cluster2 <- kmeanscor1[kmeanscor1$kmeans == 2,]
cluster3 <- kmeanscor1[kmeanscor1$kmeans == 3,]
cluster4 <- kmeanscor1[kmeanscor1$kmeans == 4,]
gprofiler_results_1 <- gprofiler(query = cluster1$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_2 <- gprofiler(query = cluster2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_3 <- gprofiler(query = cluster3$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_4 <- gprofiler(query = cluster4$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
head(gprofiler_results_1$term.name)## [1] "regulation of transcription from RNA polymerase II promoter by histone modification"
## [2] "negative regulation of transcription by RNA polymerase II"
## [3] "negative regulation of transcription from RNA polymerase II promoter by histone modification"
## [4] "extracellular matrix organization"
## [5] "phosphatidylcholine biosynthetic process"
## [6] "purine-containing compound catabolic process"
head(gprofiler_results_2$term.name)## [1] "regulation of syncytium formation by plasma membrane fusion"
## [2] "regulation of myoblast fusion"
## [3] "lipoprotein metabolic process"
## [4] "T cell proliferation"
## [5] "regulation of T cell proliferation"
## [6] "regulation of cAMP-mediated signaling"
head(gprofiler_results_3$term.name)## [1] "regulation of endothelial cell development"
## [2] "regulation of establishment of endothelial barrier"
## [3] "hepatic immune response"
## [4] "ovulation"
## [5] "protein localization to cell leading edge"
## [6] "regulation of protein localization to cell leading edge"
head(gprofiler_results_4$term.name)## [1] "FIB-associated protein complex" "RHINO-TopBP1 complex"
## [3] "PICK1-GRIP1-GLUR2 complex" "GRASP1-GRIP1 complex"
## [5] "PlexinA1-NRP1-SEMA3A complex" "MICA-KLRK1-HCST complex"
cluster1_2 <- kmeanscor2[kmeanscor2$kmeans == 1,]
cluster2_2 <- kmeanscor2[kmeanscor2$kmeans == 2,]
cluster3_2 <- kmeanscor2[kmeanscor2$kmeans == 3,]
cluster4_2 <- kmeanscor2[kmeanscor2$kmeans == 4,]
gprofiler_results_1_2 <- gprofiler(query = cluster1_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_2_2 <- gprofiler(query = cluster2_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_3_2 <- gprofiler(query = cluster3_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_4_2 <- gprofiler(query = cluster4_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
head(gprofiler_results_1_2$term.name)## [1] "response to calcium ion"
## [2] "regulation of fatty acid oxidation"
## [3] "peptidyl-tyrosine modification"
## [4] "purine-containing compound catabolic process"
## [5] "negative regulation of plasminogen activation"
## [6] "regulation of transcription from RNA polymerase II promoter by histone modification"
head(gprofiler_results_2_2$term.name)## [1] "luteinizing hormone signaling pathway"
## [2] "response to iron ion starvation"
## [3] "protein localization to cell leading edge"
## [4] "regulation of protein localization to cell leading edge"
## [5] "negative regulation of protein localization to cell leading edge"
## [6] "pyrimidine nucleobase transport"
head(gprofiler_results_3_2$term.name)## [1] "regulation of apoptotic cell clearance"
## [2] "positive regulation of apoptotic cell clearance"
## [3] "STAT cascade"
## [4] "JAK-STAT cascade"
## [5] "negative regulation of cell adhesion"
## [6] "negative regulation of cell-cell adhesion"
head(gprofiler_results_4_2$term.name)## [1] "PICK1-GRIP1-GLUR2 complex" "PlexinA1-NRP1-SEMA3A complex"
## [3] "TFIIH transcription factor complex" "Nav1.2-beta2 complex"
## [5] "ITGA2-ITGB1-CD47 complex" "FIB-associated protein complex"
k6 <- kmeans(df3, centers = 6, nstart = 25)
str(k6)## List of 9
## $ cluster : int [1:1204] 6 5 6 4 2 6 5 5 5 6 ...
## $ centers : num [1:6, 1:7] -0.2668 -0.0329 2.3003 0.1708 -0.8236 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:6] "1" "2" "3" "4" ...
## .. ..$ : chr [1:7] "LPS" "IFNy" "IL4" "R848" ...
## $ totss : num 8421
## $ withinss : num [1:6] 574 548 591 435 785 ...
## $ tot.withinss: num 3803
## $ betweenss : num 4618
## $ size : int [1:6] 180 222 103 87 253 359
## $ iter : int 4
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
fviz_cluster(k6, data = df3)# first run
k <- kmeans(df3, centers = 6, nstart = 25)
kmeans <- k$cluster
kmean_download <- as.data.frame(kmeans)
kmeanscor <- cbind(kmean_download, DF_log2FC)
kmeanscor1 = merge(kmeanscor, gencode_30, by="ensembl")
# second run
k <- kmeans(df3, centers = 6, nstart = 25)
kmeans <- k$cluster
kmean_download <- as.data.frame(kmeans)
kmeanscor <- cbind(kmean_download, DF_log2FC)
kmeanscor2 = merge(kmeanscor, gencode_30, by="ensembl")
res <- cor.test(kmeanscor1$kmeans, kmeanscor2$kmeans, method = "pearson")
res ##
## Pearson's product-moment correlation
##
## data: kmeanscor1$kmeans and kmeanscor2$kmeans
## t = -2.6365, df = 1202, p-value = 0.008485
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.13175701 -0.01941359
## sample estimates:
## cor
## -0.07582593
cluster1 <- kmeanscor1[kmeanscor1$kmeans == 1,]
cluster2 <- kmeanscor1[kmeanscor1$kmeans == 2,]
cluster3 <- kmeanscor1[kmeanscor1$kmeans == 3,]
cluster4 <- kmeanscor1[kmeanscor1$kmeans == 4,]
cluster5 <- kmeanscor1[kmeanscor1$kmeans == 5,]
cluster6 <- kmeanscor1[kmeanscor1$kmeans == 6,]
gprofiler_results_1 <- gprofiler(query = cluster1$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_2 <- gprofiler(query = cluster2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_3 <- gprofiler(query = cluster3$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_4 <- gprofiler(query = cluster4$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_5 <- gprofiler(query = cluster5$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_6 <- gprofiler(query = cluster6$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
head(gprofiler_results_1$term.name)## [1] "protein deubiquitination"
## [2] "alpha-beta T cell activation"
## [3] "regulation of alpha-beta T cell activation"
## [4] "CD4-positive, alpha-beta T cell activation"
## [5] "alpha-beta T cell differentiation"
## [6] "CD4-positive, alpha-beta T cell differentiation"
head(gprofiler_results_2$term.name)## [1] "negative regulation of interleukin-12 production"
## [2] "cellular response to mycotoxin"
## [3] "regulation of fatty acid oxidation"
## [4] "negative regulation of type I interferon production"
## [5] "positive regulation of mitotic cell cycle phase transition"
## [6] "negative regulation of chronic inflammatory response"
head(gprofiler_results_3$term.name)## character(0)
head(gprofiler_results_4$term.name)## [1] "cellular developmental process" "cell differentiation"
## [3] "cell development" "cell activation"
## [5] "catalytic activity" "acylglycerol lipase activity"
head(gprofiler_results_5$term.name)## [1] "modulation by host of viral glycoprotein metabolic process"
## [2] "negative regulation by host of viral glycoprotein metabolic process"
## [3] "negative regulation of iron ion transport"
## [4] "regulation of iron ion transmembrane transport"
## [5] "negative regulation of iron ion transmembrane transport"
## [6] "purine nucleobase transmembrane transport"
head(gprofiler_results_6$term.name)## [1] "HDAC1-associated core complex cII" "9-1-1-RHINO complex"
## [3] "TFIIH transcription factor complex" "ITGA2-ITGB1-CHAD complex"
## [5] "GRASP1-GRIP1 complex" "ESR1-GRIP1 complex"
cluster1_2 <- kmeanscor2[kmeanscor2$kmeans == 1,]
cluster2_2 <- kmeanscor2[kmeanscor2$kmeans == 2,]
cluster3_2 <- kmeanscor2[kmeanscor2$kmeans == 3,]
cluster4_2 <- kmeanscor2[kmeanscor2$kmeans == 4,]
cluster5_2 <- kmeanscor2[kmeanscor2$kmeans == 5,]
cluster6_2 <- kmeanscor2[kmeanscor2$kmeans == 6,]
gprofiler_results_1_2 <- gprofiler(query = cluster1_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_2_2 <- gprofiler(query = cluster2_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_3_2 <- gprofiler(query = cluster3_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_4_2 <- gprofiler(query = cluster4_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_5_2 <- gprofiler(query = cluster5_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_6_2 <- gprofiler(query = cluster6_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
head(gprofiler_results_1_2$term.name)## [1] "fused antrum stage"
## [2] "ovarian cumulus expansion"
## [3] "ovulation"
## [4] "negative regulation of glucose transmembrane transport"
## [5] "positive regulation of leukocyte adhesion to arterial endothelial cell"
## [6] "negative regulation of branching involved in lung morphogenesis"
head(gprofiler_results_2_2$term.name)## [1] "STAT cascade"
## [2] "JAK-STAT cascade"
## [3] "post-translational protein modification"
## [4] "I-kappaB kinase/NF-kappaB signaling"
## [5] "regulation of I-kappaB kinase/NF-kappaB signaling"
## [6] "positive regulation of I-kappaB kinase/NF-kappaB signaling"
head(gprofiler_results_3_2$term.name)## character(0)
head(gprofiler_results_4_2$term.name)## [1] "PlexinA1-NRP1-SEMA3A complex" "ITGA2-ITGB1-CHAD complex"
## [3] "HDAC1-associated protein complex" "HDAC2-asscociated core complex"
## [5] "ITGA2-ITGB1-COL6A3 complex" "MICA-KLRK1-HCST complex"
head(gprofiler_results_5_2$term.name)## [1] "response to acid chemical"
## [2] "negative regulation of chronic inflammatory response"
## [3] "regulation of high voltage-gated calcium channel activity"
## [4] "response to inorganic substance"
## [5] "movement of cell or subcellular component"
## [6] "tissue development"
head(gprofiler_results_6_2$term.name)## [1] "cellular developmental process" "cell differentiation"
## [3] "cell development" "cell activation"
## [5] "catalytic activity" "acylglycerol lipase activity"
k7 <- kmeans(df3, centers = 7, nstart = 25)
str(k3)## List of 9
## $ cluster : int [1:1204] 3 3 3 2 2 3 3 3 3 3 ...
## $ centers : num [1:3, 1:7] 2.1299 0.0598 -0.3688 -0.1881 0.5394 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:3] "1" "2" "3"
## .. ..$ : chr [1:7] "LPS" "IFNy" "IL4" "R848" ...
## $ totss : num 8421
## $ withinss : num [1:3] 716 1899 3005
## $ tot.withinss: num 5619
## $ betweenss : num 2802
## $ size : int [1:3] 118 348 738
## $ iter : int 3
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
fviz_cluster(k7, data = df3)#first run
k <- kmeans(df3, centers = 7, nstart = 25)
kmeans <- k$cluster
kmean_download <- as.data.frame(kmeans)
kmeanscor <- cbind(kmean_download, DF_log2FC)
kmeanscor1 = merge(kmeanscor, gencode_30, by="ensembl")
#second run
k <- kmeans(df3, centers = 7, nstart = 25)
kmeans <- k$cluster
kmean_download <- as.data.frame(kmeans)
kmeanscor <- cbind(kmean_download, DF_log2FC)
kmeanscor2 = merge(kmeanscor, gencode_30, by="ensembl")
res <- cor.test(kmeanscor1$kmeans, kmeanscor2$kmeans, method = "pearson")
res ##
## Pearson's product-moment correlation
##
## data: kmeanscor1$kmeans and kmeanscor2$kmeans
## t = 6.1875, df = 1202, p-value = 8.368e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1203924 0.2299064
## sample estimates:
## cor
## 0.1756929
cluster1 <- kmeanscor1[kmeanscor1$kmeans == 1,]
cluster2 <- kmeanscor1[kmeanscor1$kmeans == 2,]
cluster3 <- kmeanscor1[kmeanscor1$kmeans == 3,]
cluster4 <- kmeanscor1[kmeanscor1$kmeans == 4,]
cluster5 <- kmeanscor1[kmeanscor1$kmeans == 5,]
cluster6 <- kmeanscor1[kmeanscor1$kmeans == 6,]
cluster7 <- kmeanscor1[kmeanscor1$kmeans == 7,]
gprofiler_results_1 <- gprofiler(query = cluster1$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_2 <- gprofiler(query = cluster2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_3 <- gprofiler(query = cluster3$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_4 <- gprofiler(query = cluster4$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_5 <- gprofiler(query = cluster5$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_6 <- gprofiler(query = cluster6$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_7 <- gprofiler(query = cluster7$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
head(gprofiler_results_1$term.name)## [1] "ammonium ion metabolic process" "response to stimulus"
## [3] "tube development" "tube morphogenesis"
## [5] "side of membrane" "external side of plasma membrane"
head(gprofiler_results_2$term.name)## [1] "alpha-beta T cell activation"
## [2] "regulation of alpha-beta T cell activation"
## [3] "CD4-positive, alpha-beta T cell activation"
## [4] "alpha-beta T cell differentiation"
## [5] "CD4-positive, alpha-beta T cell differentiation"
## [6] "leukocyte proliferation"
head(gprofiler_results_3$term.name)## [1] "HDAC1-associated protein complex"
## [2] "PlexinA1-NRP1-SEMA3A complex"
## [3] "PICK1-GRIP1-GLUR2 complex"
## [4] "NRD complex (Nucleosome remodeling and deacetylation complex)"
## [5] "FIF-FGR2 complex"
## [6] "ESR1-GRIP1 complex"
head(gprofiler_results_4$term.name)## [1] "localization of cell"
## [2] "regulation of locomotion"
## [3] "cell motility"
## [4] "regulation of cellular component movement"
## [5] "regulation of cell motility"
## [6] "regulation of cell migration"
head(gprofiler_results_5$term.name)## [1] "fused antrum stage" "ovarian cumulus expansion"
## [3] "collagen metabolic process" "collagen catabolic process"
## [5] "adenosine catabolic process" "xanthine metabolic process"
head(gprofiler_results_6$term.name)## [1] "regulation of interleukin-2 production"
## [2] "muscle cell proliferation"
## [3] "smooth muscle cell proliferation"
## [4] "regulation of smooth muscle cell proliferation"
## [5] "negative regulation of smooth muscle cell proliferation"
## [6] "response to wounding"
head(gprofiler_results_7$term.name)## [1] "cell-cell signaling by wnt"
## [2] "Wnt signaling pathway"
## [3] "response to calcium ion"
## [4] "reactive oxygen species metabolic process"
## [5] "phosphorus metabolic process"
## [6] "phosphate-containing compound metabolic process"
cluster1_2 <- kmeanscor2[kmeanscor2$kmeans == 1,]
cluster2_2 <- kmeanscor2[kmeanscor2$kmeans == 2,]
cluster3_2 <- kmeanscor2[kmeanscor2$kmeans == 3,]
cluster4_2 <- kmeanscor2[kmeanscor2$kmeans == 4,]
cluster5_2 <- kmeanscor2[kmeanscor2$kmeans == 5,]
cluster6_2 <- kmeanscor2[kmeanscor2$kmeans == 6,]
cluster7_2 <- kmeanscor2[kmeanscor2$kmeans == 7,]
gprofiler_results_1_2 <- gprofiler(query = cluster1_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_2_2 <- gprofiler(query = cluster2_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_3_2 <- gprofiler(query = cluster3_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_4_2 <- gprofiler(query = cluster4_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_5_2 <- gprofiler(query = cluster5_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_6_2 <- gprofiler(query = cluster6_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
gprofiler_results_7_2 <- gprofiler(query = cluster7_2$symbol, organism = "hsapiens",
ordered_query = F,
exclude_iea = F,
max_p_value = 0.05,
max_set_size = 0,
correction_method = "fdr",
hier_filtering = "none",
domain_size = "annotated")
head(gprofiler_results_1_2$term.name)## [1] "NRD complex (Nucleosome remodeling and deacetylation complex)"
## [2] "Nav1.2-beta2 complex"
## [3] "HDAC1-associated protein complex"
## [4] "FIF-FGR2 complex"
## [5] "PlexinA1-NRP1-SEMA3A complex"
## [6] "ER-alpha-GRIP1-c-Jun complex"
head(gprofiler_results_2_2$term.name)## [1] "tube development" "tube morphogenesis"
## [3] "ammonium ion metabolic process" "response to stimulus"
## [5] "side of membrane" "external side of plasma membrane"
head(gprofiler_results_3_2$term.name)## [1] "DNA replication initiation"
## [2] "phosphorus metabolic process"
## [3] "phosphate-containing compound metabolic process"
## [4] "regulation of phosphorus metabolic process"
## [5] "regulation of phosphate metabolic process"
## [6] "regulation of molecular function"
head(gprofiler_results_4_2$term.name)## [1] "swimming behavior"
## [2] "purine nucleobase transmembrane transport"
## [3] "positive regulation of smooth muscle contraction"
## [4] "protein localization to cell leading edge"
## [5] "regulation of protein localization to cell leading edge"
## [6] "negative regulation of protein localization to cell leading edge"
head(gprofiler_results_5_2$term.name)## [1] "alpha-beta T cell activation"
## [2] "regulation of alpha-beta T cell activation"
## [3] "CD4-positive, alpha-beta T cell activation"
## [4] "alpha-beta T cell differentiation"
## [5] "CD4-positive, alpha-beta T cell differentiation"
## [6] "leukocyte proliferation"
head(gprofiler_results_6_2$term.name)## [1] "parturition"
## [2] "chronic inflammatory response"
## [3] "negative regulation of chronic inflammatory response"
## [4] "muscle cell proliferation"
## [5] "smooth muscle cell proliferation"
## [6] "regulation of smooth muscle cell proliferation"
head(gprofiler_results_7_2$term.name)## [1] "localization of cell"
## [2] "cell motility"
## [3] "regulation of locomotion"
## [4] "regulation of cellular component movement"
## [5] "regulation of cell motility"
## [6] "regulation of cell migration"